rm(list=ls(all=TRUE))
library(foreign)
library(car)


covid2=read.csv("C:/Users/weary/Dropbox/Life as a Professor/USAvirus/USAvirus.csv")

#Create Date
covid2$date=as.Date(covid2$time, "%d/%m/%Y")
table(covid2$date)


left = function(text, num_char) {
  substr(text, 1, num_char)
}

mid = function(text, start_num, num_char) {
  substr(text, start_num, start_num + num_char - 1)
}

right = function(text, num_char) {
  substr(text, nchar(text) - (num_char-1), nchar(text))
}

#Number of external content
covid2$rturl=right(as.character(covid2$text),23)
covid2$rturl[left(covid2$rturl,5)!="https"]=NA
table(is.na(covid2$rturl))  #184 no external link
#3383/3567   #94.8%



#Chinese comment
covid2$Chinese_use2=NA
covid2$Chinese_use2[covid2$Chinese_use==0]=0
covid2$Chinese_use2[covid2$Chinese_use>0]=1
table(covid2$Chinese_use2)
2414/3567

covid2$chinese_photo=NA
#Link coding
linkc=read.csv("link_complete.csv")
for(i in 1:3567){
  if(1-is.na(covid2$rturl[i])){
    covid2$chinese_photo[i]=linkc$Chinese_photo[linkc$Var1==covid2$rturl[i]]
    
  }
}
#Number of Chinese photo
table(covid2$chinese_photo)

write.csv(covid2,"usaviruscombined.csv")
table(covid2$chinese_photo,covid2$Chinese_use2)
#0         1
#0  605  149
#1  365 2264


#Cuba
covid2$cuba=0
library(stringi)

covid2$cuba[grep("cuba",covid2$text)]=1
covid2$cuba[grep("Cuba",covid2$text)]=1
table(covid2$cuba)


#==============FINDING2========================#


covid2$user_followers_count[is.na(covid2$user_followers_count)]=0
covid2$user_friends_count[is.na(covid2$user_friends_count)]=0

summary(covid2$user_followers_count)

summary(covid2$user_followers_count[covid2$chinese_photo==0])
summary(covid2$user_followers_count[covid2$chinese_photo==1])

t.test(covid2$user_followers_count[covid2$chinese_photo==0],covid2$user_followers_count[covid2$chinese_photo==1]) 

summary(covid2$user_friends_count)

summary(covid2$user_friends_count[covid2$chinese_photo==0])
summary(covid2$user_friends_count[covid2$chinese_photo==1])
t.test(covid2$user_friends_count[covid2$chinese_photo==0],covid2$user_friends_count[covid2$chinese_photo==1]) 

#====================FINDING3==================#


covid2$time_text=as.character(covid2$created_at)

covid2$hour=left(right(covid2$time_text,19),2)
covid2$hour_r=as.numeric(covid2$hour)
table(covid2$hour,covid2$hour_r)


covid2$hour_r=covid2$hour_r-4
covid2$hour_r[covid2$hour_r==-1]=23
covid2$hour_r[covid2$hour_r==-2]=22
covid2$hour_r[covid2$hour_r==-3]=21
covid2$hour_r[covid2$hour_r==-4]=20


table(covid2$hour,covid2$hour_r)

library(ggplot2)
ggplot(data=covid2[covid2$chinese_photo==1,],aes(x=hour_r))+stat_bin(bins=24)+theme_bw()+
  scale_x_time(breaks=c(0:23),labels=c(0:23))+
  xlab("Time of the Chinese text-image tweets created (n=2623), Eastern Time Zone")

ggplot(data=covid2[covid2$chinese_photo==0&covid2$Chinese_use2==0,],aes(x=hour_r))+stat_bin(bins=24)+theme_bw()+
  scale_x_time(breaks=c(0:23),labels=c(0:23))+
  xlab("Time of the non-Chinese tweets created (n=605), Eastern Time Zone")
